CHARTS

Better Life Index

Biplots

Photo by Nilay Sozbir  on Unsplash

Photo by Nilay Sozbir on Unsplash

Karaköy , Istanbul, Turkey

We have always held to the hope, the belief,
the conviction that there is a better life, a better world, beyond the horizon…
— Franklin D. Roosevelt


Ingest

countries, indicators, and measures

url_root <- "https://raw.githubusercontent.com/UN-AVT/kamino-source/main/sources/0-shared/data/"
url_file <- "better-life-index/better-life-index.csv"
url <- paste0(url_root, url_file)

df <- read.csv(url, encoding = "UTF-8")
df

Wrangle

filter indicators, transform long to wide, clean

  • Educational attainment
  • Student skills
  • Years in education
# Apply filter (or not)
df_filter <- filter(df, Indicator %in% c("Educational attainment", "Student skills", "Years in education"))

# Create a unique indicator name
df_filter <- df_filter %>% mutate(
  Indicator_Long = paste0(Indicator, "-", Inequality, "-", Unit)
)

# Select relevant fields
df_select <- df_filter %>% select(Country, Indicator_Long, Value )

# Convert from long to wide
df_wide <- spread(df_select, Indicator_Long, Value)

# The pca function can only take numeric values,
# so we convert Country column to be rownames
df_wide <- column_to_rownames(df_wide, var = "Country")

# Use only complete cases
df_wide <- df_wide[complete.cases(df_wide), ]
df_wrangle <- df_wide %>% janitor::clean_names()
df_wrangle

Wrangle

filter to create groups for men, women, total, and combined

Men measures:

  • educational_attainment_men_percentage
  • student_skills_men_average_score
  • years_in_education_men_years

Women measures:

  • educational_attainment_women_percentage
  • student_skills_women_average_score
  • years_in_education_women_years

Total measures:

  • educational_attainment_total_percentage
  • student_skills_total_average_score
  • years_in_education_total_years
df_men <- df_wrangle %>% select(educational_attainment_men_percentage, student_skills_men_average_score, years_in_education_men_years)

df_women <- df_wrangle %>% select(educational_attainment_women_percentage, student_skills_women_average_score, years_in_education_women_years )

df_total <- df_wrangle %>% select(educational_attainment_total_percentage, student_skills_total_average_score, years_in_education_total_years )

df_men_and_women <- df_wrangle %>% select(educational_attainment_men_percentage, 
                                          student_skills_men_average_score, 
                                          years_in_education_men_years,
                                          educational_attainment_women_percentage, 
                                          student_skills_women_average_score, 
                                          years_in_education_women_years)

Analytics

fit a principal components model

# fit a principal components model
df_pca_men <- prcomp(x = df_men, 
              center = TRUE, 
              scale = TRUE)
# df_pca_men

df_pca_women <- prcomp(x = df_women, 
              center = TRUE, 
              scale = TRUE)
# df_pca_women

df_pca_total <- prcomp(x = df_total, 
              center = TRUE, 
              scale = TRUE)
# df_pca_total

df_pca_men_and_women <- prcomp(x = df_men_and_women, 
              center = TRUE, 
              scale = TRUE)
df_pca_men_and_women
## Standard deviations (1, .., p=6):
## [1] 1.9750312 1.2020418 0.7238546 0.2918286 0.1585632 0.1416892
## 
## Rotation (n x k) = (6 x 6):
##                                                PC1         PC2        PC3
## educational_attainment_men_percentage   -0.4121305  0.38709256  0.4448393
## student_skills_men_average_score        -0.4504344  0.09840132 -0.5939604
## years_in_education_men_years            -0.3162533 -0.62367793  0.1385072
## educational_attainment_women_percentage -0.4245037  0.37489496  0.3891020
## student_skills_women_average_score      -0.4728868  0.06123156 -0.4584192
## years_in_education_women_years          -0.3513059 -0.55426243  0.2619090
##                                                 PC4        PC5        PC6
## educational_attainment_men_percentage   -0.30763610 -0.5290686 -0.3284391
## student_skills_men_average_score         0.04364306 -0.4174911  0.5083636
## years_in_education_men_years            -0.65315315  0.1655987  0.1944055
## educational_attainment_women_percentage  0.19842779  0.5455257  0.4368960
## student_skills_women_average_score      -0.00611875  0.4178820 -0.6227501
## years_in_education_women_years           0.66138888 -0.2148044 -0.1311671

Plot

# fviz_pca function produces a ggplot2 graph.
v1 <- fviz_pca(df_pca_men,  repel = TRUE,  labelsize = 3) + 
  theme_bw() +
  labs(title = "Better Life Index, Education, Men")

# Draw plot
girafe(ggobj = v1, width_svg = 16, height_svg = 9,
       options = list(opts_sizing(rescale = TRUE, width = 1.0)))
# fviz_pca function produces a ggplot2 graph.
v2 <- fviz_pca(df_pca_women,  repel = TRUE,  labelsize = 3) + 
  theme_bw() +
  labs(title = "Better Life Index, Education, Women")

# Draw plot
girafe(ggobj = v2, width_svg = 16, height_svg = 9,
       options = list(opts_sizing(rescale = TRUE, width = 1.0)))
# fviz_pca function produces a ggplot2 graph.
v3 <- fviz_pca(df_pca_total,  repel = TRUE,  labelsize = 3) + 
  theme_bw() +
  labs(title = "Better Life Index, Education, Total")

# Draw plot
girafe(ggobj = v3, width_svg = 16, height_svg = 9,
       options = list(opts_sizing(rescale = TRUE, width = 1.0)))
# fviz_pca function produces a ggplot2 graph.
v4 <- fviz_pca(df_pca_men_and_women,  repel = TRUE,  labelsize = 3) + 
  theme_bw() +
  labs(title = "Better Life Index, Education, Men & Women")

# Draw plot
girafe(ggobj = v4, width_svg = 16, height_svg = 9,
       options = list(opts_sizing(rescale = TRUE, width = 1.0)))

References

The citations and data sources used for this case

  • Narrative and Data Source, OECD